vllm serve /home/renchong/.cache/modelscope/hub/models/Qwen/Qwen3-8B-AWQ 	\
	--max-model-len 1024 --port 10086 \
	--served-model-name semantic_model \
	--gpu_memory_utilization 0.2 \
	--enable-lora \
	--lora-modules lora_semantic=/home/renchong/workspace/LLaMA-Factory/saves/Qwen3-8B-Instruct-AWQ/lora/train_2025-07-31-18-04-46 \
	--chat-template ./qwen3_nonthinking_semantic.jinja

